﻿import re

f = open("access_2021_02_22.log", "r")

url_arr = []
sta_arr = []

lines = f.readlines()

for line in lines:
    status = re.findall(r'\s([2-6][0-1]\d)', line)
    for m1 in status:
        st1 = m1
        sta_arr.append(st1)

    url = re.findall(
        r'\s"(htt\w+.\/\/[0-2a-z]\w+.\w+.\w+.\w+.\w+.\w+.\w+)', line)
    for m2 in url:
        ur2 = m2
        url_arr.append(ur2)

f.close()


def Count(c_list):
    dist = {}
    for i in range(len(c_list)):
        count = 1
        item = c_list[i]
        if item not in dist.keys():
            dist[item] = count

        else:
            continue
        for j in range(i+1, len(c_list)):
            if c_list[i] == c_list[j]:
                count += 1
                dist[item] = count

            else:
                continue
    c1 = []
    for i in dist.values():
        c1.append(i)

    print("these are the total of %s !" % (sum(c1)), "\ndetail is ", dist)


print("The url:")
Count(url_arr)
print("The status:")
Count(sta_arr)

"""
完成的不错。不过建议在打开文件时建议使用with
"""
